#===================================================================================================#
# Title: Determined by Mode? Representation and Measurement Effects in a Dual Mode Statewide Survey
# Authors: Enrijeta Shino, Michael D. Martinez, and Michael Binder
# Journal: Journal of Survey Statistics and Methodology
# Year: 2021
#===================================================================================================#

rm(list=ls())
setwd("/Users/Enrijeta/Dropbox/JSSAM2021_RepicationFiles/JSSAM21_Data&Code")

# Libraries
library(readstata13)
library(car)
library(Hmisc)
library(weights)
library(descr)
library(dplyr)
library(plyr)
library(data.table)
library(gdata)
require(MASS)
require(reshape2)
library(mlogit)
library(zipcode)
library(psych)
library(effsize)
library(sandwich)
library(survey)


# Load datasets
load("dat1.Rdata")
load("dat2.Rdata") 
dat3 <- read.dta13("Population from which sample was drawn.dta")


#=========================#
# Voter File Descriptives #
#=========================#
load("voterfile.Rdata")

# Create registration year variable 
voterfile$reg_year <- format(as.POSIXct(strptime(
  voterfile$registration8,"%m/%d/%Y", tz="")), 
  format = "%Y")

table(voterfile$reg_year)


# Create voter registration in years
voterfile$reg_year <- as.numeric(voterfile$reg_year)
table(voterfile$reg_year)


voterfile$reg_inyears <- 2017 - (voterfile$reg_year)
table(voterfile$reg_inyears)
summary(voterfile$reg_inyears)


# Gender: 0 = female, 1 = male, 2 = unassigned
voterfile$gender <- with(voterfile, ifelse(sex == "F", 0, 
                                           ifelse(sex == "M", 1,
                                                  ifelse(sex == "U", 2, NA))))
table(voterfile$gender)


# Race: 1 = white, 2 = black, 3 = hispanic, 4 = other/unknown
voterfile$race1 <- with(voterfile, ifelse(race == 5, 1, 
                                          ifelse(race == 3, 2, 
                                                 ifelse(race == 4, 3, 4))))
table(voterfile$race1)


# PID: 1 = Democrat, 2 = Republican, 3 = NPA, 4 = Other/Independent 
voterfile$pid <- with(voterfile, ifelse(partyreg == "DEM", 1, 
                                        ifelse(partyreg == "REP", 2, 
                                               ifelse(partyreg == "NPA", 3, 4))))
table(voterfile$pid)


# Age: 1 = 18-29, 2 = 30-44, 3 = 45-59, 4 = 60+ 
voterfile$age1 <- voterfile$age
voterfile$age1[voterfile$age1 <= 17] <- NA
voterfile$age1[voterfile$age1 >= 105] <- NA

voterfile$age1 <- with(voterfile, ifelse(age1 >= 18 & age1 < 30, 1, 
                                         ifelse(age1 >= 30 & age1 < 45, 2,
                                                ifelse(age1 >= 45 & age1 < 60, 3,
                                                       ifelse(age1 >= 60, 4, NA)))))
table(voterfile$age1)



#=============================#
# Sampling Frame Descreptives #
#=============================#
dat3 <- read.dta13("Population from which sample was drawn.dta")
dat3 <- data.table(dat3)

# Gender: 0 = female, 1 = male, 2 = unassigned
dat3$gender <- with(dat3, ifelse(sex == "F", 0, 
                                 ifelse(sex == "M", 1,
                                        ifelse(sex == "U", 2, NA))))
table(dat3$gender)


# Race: 1 = white, 2 = black, 3 = hispanic, 4 = other/unknown
dat3$race1 <- with(dat3, ifelse(race == 5, 1, 
                                ifelse(race == 3, 2, 
                                       ifelse(race == 4, 3, 4))))
table(dat3$race1)


# PID: 1 = Democrat, 2 = Republican, 3 = NPA, 4 = Other/Independent 
dat3$pid <- with(dat3, ifelse(partyreg == "DEM", 1, 
                              ifelse(partyreg == "REP", 2, 
                                     ifelse(partyreg == "NPA", 3, 4))))
table(dat3$pid)


# Age: 1 = 18-29, 2 = 30-44, 3 = 45-59, 4 = 60+ 
dat3$age1 <- dat3$age
dat3$age1[dat3$age1 == 16] <- NA
dat3$age1[dat3$age1 == 17] <- NA

dat3$age1 <- with(dat3, ifelse(age1 >= 18 & age1 < 30, 1, 
                               ifelse(age1 >= 30 & age1 < 45, 2,
                                      ifelse(age1 >= 45 & age1 < 60, 3,
                                             ifelse(age1 >= 60, 4, NA)))))
table(dat3$age1)


# Voted in 2016: 1 = yes; 0 = no
table(dat3$gen2016)


# Create registration year variable
dat3$reg_year <- format(as.POSIXct(strptime(
  dat3$registration8,"%m/%d/%Y", tz="")), 
  format = "%Y")

table(dat3$reg_year)


# Create voter registration in years: 2017 - registration year
dat3$reg_year <- as.numeric(dat3$reg_year)
table(dat3$reg_year)

dat3$reg_inyears <- 2017 - (dat3$reg_year)
table(dat3$reg_inyears)



#====================================#
# Table 1: Florida Registered Voters #
#====================================#
prop.table(table(voterfile$gender))
prop.table(table(voterfile$race1))
prop.table(table(voterfile$pid))
prop.table(table(voterfile$age1))
prop.table(table(voterfile$vote16))
summary(voterfile$reg_inyears)


#======================#
# Table 1: Web + Phone #
#======================#
prop.table(table(dat3$gender))
prop.table(table(dat3$race1))
prop.table(table(dat3$pid))
prop.table(table(dat3$age1))
prop.table(table(dat3$gen2016))
summary(dat3$reg_inyears)


#=====================#
# Table 1: Phone Only #
#=====================#
# Keep only those with a phone and no email
phone_only <- voterfile[nophone == 0 & noemail == 1]

prop.table(table(phone_only$gender))
prop.table(table(phone_only$race1))
prop.table(table(phone_only$pid))
prop.table(table(phone_only$age1))
prop.table(table(phone_only$vote16))
summary(phone_only$reg_inyears)


#=====================#
# Table 1: Email Only #
#=====================#
# Keep only those with an email address and no phone
email_only <- voterfile[nophone == 1 & noemail == 0]

prop.table(table(email_only$gender))
prop.table(table(email_only$race1))
prop.table(table(email_only$pid))
prop.table(table(email_only$age1))
prop.table(table(email_only$vote16))
summary(email_only$reg_inyears)


#=====================#
# Table 1: No Contact #
#=====================#
voterfile <- data.table(voterfile)

# Keep only those with no contact
nocontact <- voterfile[nocontact == 1]

prop.table(table(nocontact$gender))
prop.table(table(nocontact$race1))
prop.table(table(nocontact$pid))
prop.table(table(nocontact$age1))
prop.table(table(nocontact$vote16))
summary(nocontact$reg_inyears)


#=============================================#
# Merge Attempts file with the Responses file #
#=============================================#

# Merge by voter ID: Merged data provided.
load("dta_responses_attempts.Rdata")

# Recode attempts for web
dta_responses_attempts$nr_attempts <- ifelse(dta_responses_attempts$V8 == dta_responses_attempts$V9, 1, 
                                             ifelse(dta_responses_attempts$V8 != dta_responses_attempts$V9, 2 , NA))


# Index and combine phone and web reminder attempts 
# Index non-missing values on the phone
index1 <- which(!is.na(dta_responses_attempts$numberofattempts))

# Put phone and web reminder attempts into one variable
dta_responses_attempts$nr_attempts[index1] <- dta_responses_attempts$numberofattempts[index1]


#====================================================#
# Table 2: Weighted vs. Unweighted Sample Comparison # 
#====================================================#

# Gender: 0 = female, 1 = male
table(dta_responses_attempts$sex)


# Race: 1= white, 2= black, 3= hispanic, 4= other/unknown
dta_responses_attempts$race1 <- with(dta_responses_attempts, ifelse(race == 5, 1, 
                                                                    ifelse(race == 3, 2, 
                                                                           ifelse(race == 4, 3, 4))))
table(dta_responses_attempts$race1)


# PID: 1 = Democrat, 2 = Republican, 3 = NPA, 4 = Other/Independent 
dta_responses_attempts$pid <- with(dta_responses_attempts, ifelse(partyreg == "DEM", 1, 
                                                                  ifelse(partyreg == "REP", 2, 
                                                                         ifelse(partyreg == "NPA", 3, 4))))
table(dta_responses_attempts$pid)


# Age: 1 = 18-29, 2 = 30-44, 3 = 45-59, 4 = 60+ 
dta_responses_attempts$age1 <- with(dta_responses_attempts, ifelse(age >= 18 & age < 30, 1, 
                                                                   ifelse(age >= 30 & age < 45, 2,
                                                                          ifelse(age >= 45 & age < 60, 3,
                                                                                 ifelse(age >= 60, 4, NA)))))
table(dta_responses_attempts$age1)


# Voted in 2016: 1 = yes; 0 = no
table(dta_responses_attempts$gen2016)


# Create registration year variable
dta_responses_attempts$reg_year <- format(as.POSIXct(strptime(
  dta_responses_attempts$registration8,"%m/%d/%Y", tz="")), 
  format = "%Y")

table(dta_responses_attempts$reg_year)

# Create voter registration in years
dta_responses_attempts$reg_year <- as.numeric(dta_responses_attempts$reg_year)
table(dta_responses_attempts$reg_year)

dta_responses_attempts$reg_inyears <- 2017 - (dta_responses_attempts$reg_year)
table(dta_responses_attempts$reg_inyears)


#=====================================#
# Table 2: All Respondents Unweighted #
#=====================================#
prop.table(table(dta_responses_attempts$sex))
prop.table(table(dta_responses_attempts$race1))
prop.table(table(dta_responses_attempts$pid))
prop.table(table(dta_responses_attempts$age1))
prop.table(table(dta_responses_attempts$gen2016))
summary(dta_responses_attempts$reg_inyears)


#===================================#
# Table 2: All Respondents Weighted #
#===================================#
wpct(dta_responses_attempts$sex, weight = dta_responses_attempts$rakeweightagrped, na.rm = TRUE)
wpct(dta_responses_attempts$race1, weight = dta_responses_attempts$rakeweightagrped, na.rm = TRUE)
wpct(dta_responses_attempts$pid, weight = dta_responses_attempts$rakeweightagrped, na.rm = TRUE)
wpct(dta_responses_attempts$age1, weight = dta_responses_attempts$rakeweightagrped, na.rm = TRUE)
wpct(dta_responses_attempts$gen2016, weight = dta_responses_attempts$rakeweightagrped, na.rm = TRUE)
summary(wpct(dta_responses_attempts$reg_inyears, weight = dta_responses_attempts$rakeweightagrped, na.rm = TRUE))
weighted.mean(dta_responses_attempts$reg_inyears, weight = dta_responses_attempts$rakeweightagrped, na.rm = TRUE)


#=========================================#
# Table 2: Respondents By Mode Unweighted #
#=========================================#
crosstab(dta_responses_attempts$sex, dta_responses_attempts$internet, weight = NULL, chisq = TRUE, prop.c = TRUE)
crosstab(dta_responses_attempts$race1, dta_responses_attempts$internet, weight = NULL, chisq = TRUE, prop.c = TRUE)
crosstab(dta_responses_attempts$pid, dta_responses_attempts$internet, weight = NULL, chisq = TRUE, prop.c = TRUE)
crosstab(dta_responses_attempts$age1, dta_responses_attempts$internet, weight = NULL, chisq = TRUE, prop.c = TRUE)
crosstab(dta_responses_attempts$gen2016, dta_responses_attempts$internet, weight = NULL, chisq = TRUE, prop.c = TRUE)
summary(dta_responses_attempts$reg_inyears[dta_responses_attempts$internet == 1]) # web
summary(dta_responses_attempts$reg_inyears[dta_responses_attempts$internet == 0]) # phone


#=======================================#
# Table 2: Respondents By Mode Weighted #
#=======================================#
crosstab(dta_responses_attempts$sex, dta_responses_attempts$internet, weight = dta_responses_attempts$rakeweightagrped, 
         chisq = TRUE, prop.c = TRUE)
crosstab(dta_responses_attempts$race1, dta_responses_attempts$internet, weight = dta_responses_attempts$rakeweightagrped, 
         chisq = TRUE, prop.c = TRUE)
crosstab(dta_responses_attempts$pid, dta_responses_attempts$internet, weight = dta_responses_attempts$rakeweightagrped, 
         chisq = TRUE, prop.c = TRUE)
crosstab(dta_responses_attempts$age1, dta_responses_attempts$internet, weight = dta_responses_attempts$rakeweightagrped, 
         chisq = TRUE, prop.c = TRUE)
crosstab(dta_responses_attempts$gen2016, dta_responses_attempts$internet, 
         weight = dta_responses_attempts$rakeweightagrped,  chisq = TRUE, prop.c = TRUE)

weighted.mean(dta_responses_attempts$reg_inyears[dta_responses_attempts$internet==1], weight = dta_responses_attempts$rakeweightagrped[dta_responses_attempts$internet==1], na.rm = TRUE)

weighted.mean(dta_responses_attempts$reg_inyears[dta_responses_attempts$internet==0], weight = dta_responses_attempts$rakeweightagrped[dta_responses_attempts$internet==0], na.rm = TRUE)


#================================================#
# Table 3:                                       #
# Phone Attempts: Respodnents on first attepmt   #
#================================================#

# Responded on the 1st attempt on the phone and web
prop.table(table(dta_responses_attempts$sex[dta_responses_attempts$nr_attempts == 1]))
prop.table(table(dta_responses_attempts$race1[dta_responses_attempts$nr_attempts == 1]))
prop.table(table(dta_responses_attempts$pid[dta_responses_attempts$nr_attempts == 1]))
prop.table(table(dta_responses_attempts$age1[dta_responses_attempts$nr_attempts == 1]))
prop.table(table(dta_responses_attempts$gen2016[dta_responses_attempts$nr_attempts == 1]))
summary(dta_responses_attempts$reg_inyears[dta_responses_attempts$nr_attempts == 1])


# Responded on the subsequent attempts on the phone and web
prop.table(table(dta_responses_attempts$sex[dta_responses_attempts$nr_attempts > 1]))
prop.table(table(dta_responses_attempts$race1[dta_responses_attempts$nr_attempts > 1]))
prop.table(table(dta_responses_attempts$pid[dta_responses_attempts$nr_attempts > 1]))
prop.table(table(dta_responses_attempts$age1[dta_responses_attempts$nr_attempts > 1]))
prop.table(table(dta_responses_attempts$gen2016[dta_responses_attempts$nr_attempts > 1]))
summary(dta_responses_attempts$reg_inyears[dta_responses_attempts$nr_attempts > 1])


#=========================================================#
# IDENTIFYING SKIPPING PATTERNS AND MISSING VALUES ON WEB # 
#=========================================================#

dat5 <- dta_responses_attempts
dat5 <- data.table(dat5)

# Start date
dat5$start <- as.Date(dat5$V8)
dat5$start <- as.numeric(format(dat5$start, "%d"))

# End date
dat5$end <- as.Date(dat5$V9)
dat5$end <- as.numeric(format(dat5$end, "%d"))


# Recode questionnaire skipping patterns on web as 999, and missing values as 8
dat5$Q3[dat5$Q2 == "1" & is.na(dat5$Q3)] <- 8
dat5$Q3[dat5$Q2 == "2" & is.na(dat5$Q3)] <- 999

dat5$Q9[dat5$Q8 == "1" & is.na(dat5$Q9)] <- 8
dat5$Q9[dat5$Q8 == "2" & is.na(dat5$Q9)] <- 999

dat5$Q10[dat5$Q8 == "1" & is.na(dat5$Q10)] <- 8
dat5$Q10[dat5$Q8 == "2" & is.na(dat5$Q10)] <- 999

dat5$Q12[dat5$Q11 == "1" & is.na(dat5$Q12)] <- 8
dat5$Q12[dat5$Q11 == "2" & is.na(dat5$Q12)] <- 999

dat5$Q13[dat5$Q11 == "1" & is.na(dat5$Q13)] <- 8
dat5$Q13[dat5$Q11 == "2" & is.na(dat5$Q13)] <- 999

dat5$Q15[dat5$Q14 == "1" & is.na(dat5$Q15)] <- 8
dat5$Q15[dat5$Q14 == "2" & is.na(dat5$Q15)] <- 999


#make 8 all missing values in columns 2-50.
dat5[ , 2:50][is.na(dat5[ , 2:50] ) ] <- 8

dat5[dat5 == 999] <- NA



#=====================#
# Issue Votign Models #
#=====================#

dta_responses_attempts <- data.table(dta_responses_attempts)


# Race: 1 = white, 2 = black, 3 = hispanic, 4 = other/unknown
dta_responses_attempts$race1 <- with(dta_responses_attempts, ifelse(race == 5, 1, 
                                                                    ifelse(race == 3, 2, 
                                                                           ifelse(race == 4, 3, 4))))
table(dta_responses_attempts$race1)


# PID: 1= Democrat, 2 = Republican, 3 = NPA
dta_responses_attempts$pid <- with(dta_responses_attempts, ifelse(partyreg == "DEM", 1, 
                                                                  ifelse(partyreg == "REP", 2, 
                                                                         ifelse(partyreg == "NPA", 3, NA))))
table(dta_responses_attempts$pid)


# Age: 1 = 18-29, 2 = 30-44, 3 = 45-59, 4 = 60+ 
dta_responses_attempts$age1 <- with(dta_responses_attempts, ifelse(age >= 18 & age < 30, 1, 
                                                                   ifelse(age >= 30 & age < 45, 2,
                                                                          ifelse(age >= 45 & age < 60, 3,
                                                                                 ifelse(age >= 60, 4, NA)))))
table(dta_responses_attempts$age1)


# Voted in 2016: 1 = yes; 0 = no
table(dta_responses_attempts$gen2016)


# Create registration year variable
dta_responses_attempts$reg_year <- format(as.POSIXct(strptime(
  dta_responses_attempts$registration8,"%m/%d/%Y", tz="")), 
  format = "%Y")

table(dta_responses_attempts$reg_year)


# Create voter registration in years
dta_responses_attempts$reg_year <- as.numeric(dta_responses_attempts$reg_year)
table(dta_responses_attempts$reg_year)

dta_responses_attempts$reg_inyears <- 2017 - (dta_responses_attempts$reg_year)
table(dta_responses_attempts$reg_inyears)


# Recode ideology: 1 = liberal; 2 = moderate; 3 = conservative
dta_responses_attempts$ideo <- with(dta_responses_attempts, ifelse(Q20 == 1 | Q20 == 2, 1, 
                                                                   ifelse(Q20 == 3, 2, 
                                                                          ifelse(Q20 == 4 | Q20 == 5, 3, NA))))
table(dta_responses_attempts$ideo)


# Ideology scott: 1 = liberal; 2 = moderate; 3 = conservative
dta_responses_attempts$ideoscott <- with(dta_responses_attempts, ifelse(Q21 <= 2, 1, 
                                                                        ifelse(Q21 == 3, 2, 3)))
table(dta_responses_attempts$ideoscott)

# Ideology nelson: 1 = liberal; 2 = moderate; 3 = conservative
dta_responses_attempts$ideonelson <- with(dta_responses_attempts, ifelse(Q22 <= 2, 1, 
                                                                         ifelse(Q22 == 3, 2, 3)))
table(dta_responses_attempts$ideonelson)


# Sex: 0 = female, 1 = male
table(dta_responses_attempts$sex)


# News: 1 = Most + always, 2 = Half of the time, 3 = Some + Never
dta_responses_attempts$news <- with(dta_responses_attempts, ifelse(Q1 == 2, 1, 
                                                                   ifelse(Q1 == 3, 2, 
                                                                          ifelse(Q1 == 4 | Q1 == 5, 3, NA))))

table(dta_responses_attempts$news)


# Health variable: 1 = government, 2 = both, 3 = private
dta_responses_attempts$health_self <- with(dta_responses_attempts, ifelse(Q25 <= 2, 1,
                                                                          ifelse(Q25 == 3, 2, 3)))

dta_responses_attempts$health_self <- as.factor(dta_responses_attempts$health_self)
table(dta_responses_attempts$health_self)


# Health Nelson
dta_responses_attempts$health_nelson <- with(dta_responses_attempts, ifelse(Q26 <= 2, 1, 
                                                                            ifelse(Q26 == 3, 2, 3)))

dta_responses_attempts$health_nelson <- as.factor(dta_responses_attempts$health_nelson)
table(dta_responses_attempts$health_nelson)


# Health Scott
dta_responses_attempts$health_scott <- with(dta_responses_attempts, ifelse(Q27 <= 2, 1, 
                                                                           ifelse(Q27 == 3, 2, 3)))

dta_responses_attempts$health_scott <- as.factor(dta_responses_attempts$health_scott)
table(dta_responses_attempts$health_scott)


# Imigration self: 1 = increase, 2 = keep same, 3 = decrease
dta_responses_attempts$immself <- with(dta_responses_attempts, ifelse(Q28 == 1, 2, 
                                                                      ifelse(Q28 == 2, 1, 
                                                                             ifelse(Q28 == 3, 3, NA))))
table(dta_responses_attempts$immself)


# Imigration Nelson: 1 = increase, 2 = keep same, 3 = decrease
dta_responses_attempts$immnels <- with(dta_responses_attempts, ifelse(Q29 == 1, 2, 
                                                                      ifelse(Q29 == 2, 1, 
                                                                             ifelse(Q29 == 3, 3, NA))))
table(dta_responses_attempts$immnels)


# Imigration Scott: 1 = increase, 2 = keep same, 3 = decrease
dta_responses_attempts$immscott <- with(dta_responses_attempts, ifelse(Q30 == 1, 2, 
                                                                       ifelse(Q30 == 2, 1, 
                                                                              ifelse(Q30 == 3, 3, NA))))
table(dta_responses_attempts$immscott)


# Confederate monumnets: 1 = remove, 2 = museum, 3 = keep
dta_responses_attempts$confederatem <- with(dta_responses_attempts, ifelse(Q31 == 1, 3, 
                                                                           ifelse(Q31 == 3, 1, 
                                                                                  ifelse(Q31 == 2, 2, NA))))
table(dta_responses_attempts$confederatem)


# Vote choice
dta_responses_attempts$trump <- dta_responses_attempts$Q3
dta_responses_attempts$trump[dta_responses_attempts$trump == 3] <- NA
dta_responses_attempts$trump <- ifelse(dta_responses_attempts$trump == 2, 1, 0)
table(dta_responses_attempts$trump)

# Vote 
dta_responses_attempts$vote16 <- dta_responses_attempts$Q2
dta_responses_attempts$vote16 <- ifelse(dta_responses_attempts$vote16 == 1,1,0)
table(dta_responses_attempts$vote16)

# Vote senate18
dta_responses_attempts$vote_sen18 <- dta_responses_attempts$Q14
dta_responses_attempts$vote_sen18 <- ifelse(dta_responses_attempts$vote_sen18==1,1,0)
table(dta_responses_attempts$vote_sen18)


# Vote scott
dta_responses_attempts$votescott <- dta_responses_attempts$Q15
dta_responses_attempts$votescott[dta_responses_attempts$votescott == 3 | dta_responses_attempts$votescott == 4] <- NA
dta_responses_attempts$votescott <- ifelse(dta_responses_attempts$votescott==2, 1, 0)
table(dta_responses_attempts$votescott)

# Heard scott
dta_responses_attempts$heard_scott <- dta_responses_attempts$Q8
dta_responses_attempts$heard_scott <- ifelse(dta_responses_attempts$heard_scott == 1, 1, 0)
table(dta_responses_attempts$heard_scott)

# Heard nelson
dta_responses_attempts$heard_nelson <- dta_responses_attempts$Q11
dta_responses_attempts$heard_nelson <- ifelse(dta_responses_attempts$heard_nelson == 1, 1, 0)
table(dta_responses_attempts$heard_nelson)

#Tax: 0= The poor would pay a very low percentage of their income in federal taxes, 1= Everybody, regardless of income, would pay about the same percentage of their income in federal taxes.
dta_responses_attempts$tax_same <- ifelse(dta_responses_attempts$Q32 == 2, 1, 0) 
table(dta_responses_attempts$tax_same)


# Issue advantage measure.
dta_responses_attempts$immnels1 <- as.numeric(dta_responses_attempts$immnels)
dta_responses_attempts$immself1 <- as.numeric(dta_responses_attempts$immself)
dta_responses_attempts$immscott1 <- as.numeric(dta_responses_attempts$immscott)
dta_responses_attempts$health_nelson1 <- as.numeric(dta_responses_attempts$health_nelson)
dta_responses_attempts$health_self1 <- as.numeric(dta_responses_attempts$health_self)
dta_responses_attempts$health_scott1 <- as.numeric(dta_responses_attempts$health_scott)


dta_responses_attempts$issue_dist <- abs(dta_responses_attempts$immnels1 - dta_responses_attempts$immself1) - abs(dta_responses_attempts$immscott1 - dta_responses_attempts$immself1) + 
  abs(dta_responses_attempts$health_nelson1 - dta_responses_attempts$health_self1) - abs(dta_responses_attempts$health_scott1 - dta_responses_attempts$health_self1)

dta_responses_attempts$issue_dist <- (dta_responses_attempts$issue_dist - min(dta_responses_attempts$issue_dist,na.rm=T))/(max(dta_responses_attempts$issue_dist,na.rm=T) - 
                                                                                                                             min(dta_responses_attempts$issue_dist,na.rm=T))

table(dta_responses_attempts$issue_dist)

# Ideology advantage measure.
dta_responses_attempts$ideo_dist <- abs(dta_responses_attempts$ideonelson - dta_responses_attempts$ideo) - abs(dta_responses_attempts$ideoscott - dta_responses_attempts$ideo)

dta_responses_attempts$ideo_dist <- (dta_responses_attempts$ideo_dist-min(dta_responses_attempts$ideo_dist,na.rm=T))/(max(dta_responses_attempts$ideo_dist,na.rm=T) - 
                                                                                                                        min(dta_responses_attempts$ideo_dist,na.rm=T))

table(dta_responses_attempts$ideo_dist)


# Immigartion issue distance 
dta_responses_attempts$issue_dist_immig <- abs(dta_responses_attempts$immnels1 - dta_responses_attempts$immself1) - abs(dta_responses_attempts$immscott1 - dta_responses_attempts$immself1)

dta_responses_attempts$issue_dist_immig <- (dta_responses_attempts$issue_dist_immig - min(dta_responses_attempts$issue_dist_immig,na.rm=T))/(max(dta_responses_attempts$issue_dist_immig,na.rm=T) -
                                                                                                                                               min(dta_responses_attempts$issue_dist_immig,na.rm=T))

table(dta_responses_attempts$issue_dist_immig)

# Health insurance issue distance 
dta_responses_attempts$issue_dist_health <- abs(dta_responses_attempts$health_nelson1 - dta_responses_attempts$health_self1) - abs(dta_responses_attempts$health_scott1 - dta_responses_attempts$health_self1)

dta_responses_attempts$issue_dist_health <- (dta_responses_attempts$issue_dist_health - min(dta_responses_attempts$issue_dist_health,na.rm=T))/(max(dta_responses_attempts$issue_dist_health,na.rm=T)-min(dta_responses_attempts$issue_dist_health,na.rm=T))

table(dta_responses_attempts$issue_dist_health)


# Subset the dataset to include only the needed variables

dta_responses_attempts <- data.table(dta_responses_attempts)

dat_final <- dta_responses_attempts[, c("pid", "ideo", "ideoscott", "ideonelson", "age1", "sex", "race1", "news", "health_self", "health_nelson", "health_scott", "immself", "immnels", "immscott", "confederatem", "trump", "vote16", "vote_sen18", "votescott", "heard_scott", "heard_nelson", "tax_same", "issue_dist", "ideo_dist", "county", "internet", "issue_dist_immig", "issue_dist_health", "rakeweightagrped", "nr_attempts", "reg_inyears")]


# PartyID basecategory: NPA 
dat_final$pid <- as.factor(dat_final$pid)
dat_final <- within(dat_final, pid <- relevel(pid, ref = "3"))

# Ideology, base category: Moderates
dat_final$ideo <- as.factor(dat_final$ideo)
dat_final <- within(dat_final, ideo <- relevel(ideo, ref = "2"))

dat_final$health_self <- as.factor(dat_final$health_self)
dat_final <- within(dat_final, health_self <- relevel(health_self, ref = "3"))


#==============================================================#
# Issue Voting Models US Senate 2018 with county fixed-effects #
#==============================================================#

# Immigration policy 
dat_final <- data.table(dat_final)

# Specify the survey design
d_strata <- svydesign(id =~ nr_attempts, weights =~ 1, data = dat_final)

# Cluster st.errors by nr. attempts
m1 <- svyglm(votescott ~ factor(immself) + internet + factor(immself):internet + sex +
               factor(race1) + pid + ideo + factor(age1) + county, design = d_strata, family=quasibinomial())
summary(m1)

# Get std.errors
SE(m1)


# Confederate monumnets 

# Cluster st.errors by nr. attempts
m2 <- svyglm(votescott ~ factor(confederatem) + internet + factor(confederatem):internet + sex +
               factor(race1) + pid + ideo + factor(age1) + county, design = d_strata, family=quasibinomial())
summary(m2)

# Get std.errors
SE(m2)



# Health policy

m3 <- svyglm(votescott ~ factor(health_self) + internet + factor(health_self):internet + sex +
               factor(race1) + pid + ideo + factor(age1) + county, design = d_strata, family=quasibinomial())
summary(m3)

# Get std.errors
SE(m3)



# Tax policy

m4 <- svyglm(votescott ~ tax_same + internet + tax_same:internet + sex +
               factor(race1) + pid + ideo + factor(age1) + county, design = d_strata, family=quasibinomial())
summary(m4)

# Get std.errors
SE(m4)



# PARTY ID, NPA base category 

m5 <- svyglm(votescott ~ pid + internet + pid:internet + sex +
               factor(race1) + pid + ideo + factor(age1) + county, design = d_strata, family=quasibinomial())
summary(m5)

# Get std.errors
SE(m5)


# Ideology, Moderates base category

m6 <- svyglm(votescott ~ ideo + internet + ideo:internet + sex +
               factor(race1) + pid + ideo + factor(age1) + county, design = d_strata, family=quasibinomial())
summary(m6)

# Get std.errors
SE(m6)


#===============================================================#
# Issue Voting, General Election 2016 with county fixed effects #
#===============================================================#

# Immigration
m1t <- svyglm(trump ~ factor(immself) + internet + factor(immself):internet + sex + 
                factor(race1) + pid + ideo + factor(age1) + county, 
              design = d_strata, family=quasibinomial())
summary(m1t)

# Get std.errors
SE(m1t)


# Confederate monuments
m2t <- svyglm(trump ~ factor(confederatem) + internet + factor(confederatem):internet + 
                sex + factor(race1) + pid + ideo + factor(age1) + county, 
              design = d_strata, family=quasibinomial())
summary(m2t)

# Get std.errors
SE(m2t)


# Health insurance
m3t <- svyglm(trump ~ factor(health_self) + internet + factor(health_self):internet + sex + factor(race1) + 
                pid + ideo + factor(age1) + county, design = d_strata, family=quasibinomial())
summary(m3t)

# Get std.errors
SE(m3t)


# Tax reform 
m4t <- svyglm(trump ~ factor(tax_same) + internet + factor(tax_same):internet + sex + factor(race1) + pid + 
                ideo + factor(age1) + county, design = d_strata, family=quasibinomial())
summary(m4t)

# Get std.errors
SE(m4t)


# Party registration
m5t <- svyglm(trump ~ pid + internet + pid:internet + sex + factor(race1) + ideo + factor(age1) + 
                county, design = d_strata, family=quasibinomial())
summary(m5t)

# Get std.errors
SE(m5t)


# Ideology, Moderates base category
m6t <- svyglm(trump ~ ideo + internet + ideo:internet + sex + factor(race1) + pid + ideo + 
                factor(age1) + county, design = d_strata, family=quasibinomial())
summary(m6t)

# Get std.errors
SE(m6t)


#=====================#
# Model Output/ LaTeX #
#=====================#

library(texreg)
library(xtable)

texreg(list(m1, m2, m3, m4, m5, m6),
       caption="Issue Voting in the 2018 Florida Elections",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("m1","m2", "m3", "m4", "m5", "m6"),
       override.se=list(summary(m1)$coef[,2],
                        summary(m2)$coef[,2],
                        summary(m3)$coef[,2],
                        summary(m4)$coef[,2],
                        summary(m5)$coef[,2],
                        summary(m6)$coef[,2],
                        override.pval=list(summary(m1)$coef[,4],
                                           summary(m2)$coef[,4],
                                           summary(m3)$coef[,4],
                                           summary(m4)$coef[,4],
                                           summary(m5)$coef[,4],
                                           summary(m6)$coef[,4])))


texreg(list(m1t, m2t, m3t, m4t, m5t, m6t),
       caption="Issue Voting: General Election Turnout",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("m1t","m2t", "m3t", "m4t", "m5t", "m6t"),
       override.se=list(summary(m1t)$coef[,2],
                        summary(m2t)$coef[,2],
                        summary(m3t)$coef[,2],
                        summary(m4t)$coef[,2],
                        summary(m5t)$coef[,2],
                        summary(m6t)$coef[,2],
                        override.pval=list(summary(m1t)$coef[,4],
                                           summary(m2t)$coef[,4],
                                           summary(m3t)$coef[,4],
                                           summary(m4t)$coef[,4],
                                           summary(m5t)$coef[,4],
                                           summary(m6t)$coef[,4])))




#---------------------------------------------#
# Table 3: Response Differences Between Modes #
#---------------------------------------------#

load("dat5.RData")
rm(dat2, dat3, dat33, dat44, dat4, m1m_kn, m2m_kn, m3m_kn, m4m_kn, web_missing)

# Recode ideology: 1 = liberal; 2 = moderate; 3 = conservative, 8 = missing
dat5$ideo_self <- with(dat5, ifelse(Q20 <= 2, 1, 
                                    ifelse(Q20 == 3, 2, 
                                           ifelse(Q20 == 4, 3, 
                                                  ifelse(Q20 == 5, 3, 8)))))
table(dat5$ideo_self)

# Ideology Scott
dat5$ideo_scott <- with(dat5, ifelse(Q21 <= 2, 1, 
                                     ifelse(Q21 == 3, 2, 
                                            ifelse(Q21 == 4, 3, 
                                                   ifelse(Q21 == 5, 3, 8)))))
table(dat5$ideo_scott)


# Ideology Nelson
dat5$ideo_nelson <- with(dat5, ifelse(Q22 <= 2, 1, 
                                      ifelse(Q22 == 3, 2, 
                                             ifelse(Q22 == 4, 3, 
                                                    ifelse(Q22 == 5, 3, 8)))))
table(dat5$ideo_nelson)


# Imigration: 1 = increase, 2 = keep same, 3 = decrease, 8 = idk
dat5$immigration_self <- with(dat5, ifelse(Q28 == 2, 1, 
                                           ifelse(Q28 == 1, 2, 
                                                  ifelse(Q28 == 3, 3, 8))))
table(dat5$immigration_self)

# Immigration Nelson
dat5$immigration_nelson <- with(dat5, ifelse(Q29 == 2, 1, 
                                             ifelse(Q29 == 1, 2, 
                                                    ifelse(Q29 == 3, 3, 8))))
table(dat5$immigration_nelson)

# Immigration Scott
dat5$immigration_scott <- with(dat5, ifelse(Q30 == 2, 1, 
                                            ifelse(Q30 == 1, 2, 
                                                   ifelse(Q30 == 3, 3, 8))))
table(dat5$immigration_scott)


# Confederate monumnets: 1 = remove, 2 = museum, 3 = keep, 8 = idk
dat5$confederatem_self <- with(dat5, ifelse(Q31 == 3, 1, 
                                            ifelse(Q31 == 2, 2,
                                                   ifelse(Q31 ==1, 3, 8))))
table(dat5$confederatem_self)


# Health variable: 1 = government, 2 = both, 3 = private, 8 = idk 
dat5$health_self <- with(dat5, ifelse(Q25 <= 2, 1,
                                      ifelse(Q25 == 3, 2, 
                                             ifelse(Q25 == 4, 3, 
                                                    ifelse(Q25 == 5, 3, 8)))))
table(dat5$health_self)

# Health Nelson
dat5$health_nelson <- with(dat5, ifelse(Q26 <= 2, 1, 
                                        ifelse(Q26 == 3, 2, 
                                               ifelse(Q26 == 4, 3, 
                                                      ifelse(Q26 == 5, 3, 8)))))
table(dat5$health_nelson)


# Health Scott
dat5$health_scott <- with(dat5, ifelse(Q27 <= 2, 1, 
                                       ifelse(Q27 == 3, 2,
                                              ifelse(Q27 == 4, 3, 
                                                     ifelse(Q27 == 5, 3, 8)))))
table(dat5$health_scott)


# Vote scott: 0 = no, 1 = yes, 8 = idk
dat5$votescott <- dat5$Q15
dat5$votescott[dat5$votescott == 3] <- NA
dat5$votescott[dat5$votescott == 4] <- NA

dat5$votescott <- with(dat5, ifelse(votescott == 1, 0,
                                    ifelse(votescott == 2, 1, 8)))
table(dat5$votescott)


# Vote Trump: 0 = no, 1 = yes, 8 = idk
dat5$trump <- dat5$Q3
dat5$trump[dat5$trump == 3] <- NA
dat5$trump <- with(dat5, ifelse(trump == 1,0, 
                                ifelse(trump == 2, 1, 8)))

table(dat5$trump)


# pid3: 1 = Dem, 2 = Rep, 3 = NPA

# pid_scott: 1 = Dem, 2 = Rep, 4 = NPA, 8 = idk; drop other party
dat5$pid_scott <- dat5$Q9
dat5$pid_scott[dat5$pid_scott == 3] <- NA

dat5$pid_nelson <- dat5$Q12
dat5$pid_nelson[dat5$pid_nelson == 3] <- NA


# Table Results
# Ideology: 1 = liberal; 2 = moderate; 3 = conservative 
crosstab(dat5$ideo_self, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)
crosstab(dat5$ideo_scott, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)
crosstab(dat5$ideo_nelson, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)

# Pid_scott/nelson:1= Dem, 2=Rep, 3= other party, 4=NPA, 8 = idk 
crosstab(dat5$pid_scott, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)
crosstab(dat5$pid_nelson, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)

# Immigartion issue: 1=increase, 2= keep same, 3= decrease 
crosstab(dat5$immigration_self, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)
crosstab(dat5$immigration_scott, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)
crosstab(dat5$immigration_nelson, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)

# Health insurance:1= government, 2= both, 3= private, 8 = idk  
crosstab(dat5$health_self, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)
crosstab(dat5$health_scott, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)
crosstab(dat5$health_nelson, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)

# Confederate statues: 1= remove, 2= museums, 3= keep  
crosstab(dat5$confederatem_self, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)

# Tax: 1 = poor pay less, 2 = pay the same  
crosstab(dat5$Q32, dat5$internet, weight = dat5$rakeweightagrped, prop.c = T)



#=================================================================#
# Appendix Replication Tables                                     #
# Issue Voting Models US Senate 2018 without County Fixed-Effects #
#=================================================================#

# Immigration policy
# Cluster st.errors by nr. attempts
m1_nfe <- svyglm(votescott ~ factor(immself) + internet + factor(immself):internet + sex +
                   factor(race1) + pid + ideo + factor(age1), design = d_strata, family=quasibinomial())
summary(m1_nfe)

# Get std.errors
SE(m1_nfe)


# Confederate monumnets
# Cluster st.errors by nr. attempts
m2_nfe <- svyglm(votescott ~ factor(confederatem) + internet + factor(confederatem):internet + sex +
                   factor(race1) + pid + ideo + factor(age1), design = d_strata, family=quasibinomial())
summary(m2_nfe)

# Get std.errors
SE(m2_nfe)


# Health policy 
m3_nfe <- svyglm(votescott ~ factor(health_self) + internet + factor(health_self):internet + sex +
                   factor(race1) + pid + ideo + factor(age1), design = d_strata, family=quasibinomial())
summary(m3_nfe)

# Get std.errors
SE(m3_nfe)


# Tax policy
m4_nfe <- svyglm(votescott ~ tax_same + internet + tax_same:internet + sex +
                   factor(race1) + pid + ideo + factor(age1), design = d_strata, family=quasibinomial())
summary(m4_nfe)

# Get std.errors
SE(m4_nfe)


# PARTY ID, NPA base category
m5_nfe <- svyglm(votescott ~ pid + internet + pid:internet + sex +
                   factor(race1) + pid + ideo + factor(age1), design = d_strata, family=quasibinomial())
summary(m5_nfe)

# Get std.errors
SE(m5_nfe)


# Ideology, Moderates base category
m6_nfe <- svyglm(votescott ~ ideo + internet + ideo:internet + sex +
                   factor(race1) + pid + ideo + factor(age1), design = d_strata, family=quasibinomial())
summary(m6_nfe)

# Get std.errors
SE(m6_nfe)


#===============================================================#
# Issue Voting, General Election 2016 with county fixed effects #
#===============================================================#

# Immigration
m1t_nfe <- svyglm(trump ~ factor(immself) + internet + factor(immself):internet + sex + 
                    factor(race1) + pid + ideo + factor(age1), 
                  design = d_strata, family=quasibinomial())
summary(m1t_nfe)

# Get std.errors
SE(m1t_nfe)


# Confederate monuments
m2t_nfe <- svyglm(trump ~ factor(confederatem) + internet + factor(confederatem):internet + 
                    sex + factor(race1) + pid + ideo + factor(age1), 
                  design = d_strata, family=quasibinomial())
summary(m2t_nfe)

# Get std.errors
SE(m2t_nfe)


# Health insurance 
m3t_nfe <- svyglm(trump ~ factor(health_self) + internet + factor(health_self):internet + sex + factor(race1) + 
                    pid + ideo + factor(age1), design = d_strata, family=quasibinomial())
summary(m3t_nfe)

# Get std.errors
SE(m3t_nfe)


# Tax reform 
m4t_nfe <- svyglm(trump ~ factor(tax_same) + internet + factor(tax_same):internet + sex + factor(race1) + pid + 
                    ideo + factor(age1), design = d_strata, family=quasibinomial())
summary(m4t_nfe)

# Get std.errors
SE(m4t_nfe)


# Party registration
m5t_nfe <- svyglm(trump ~ pid + internet + pid:internet + sex + factor(race1) + ideo + factor(age1), 
                  design = d_strata, family=quasibinomial())
summary(m5t_nfe)

# Get std.errors
SE(m5t_nfe)


# Ideology, Moderates base category 
m6t_nfe <- svyglm(trump ~ ideo + internet + ideo:internet + sex + factor(race1) + pid + ideo + 
                    factor(age1), design = d_strata, family=quasibinomial())
summary(m6t_nfe)

# Get std.errors
SE(m6t_nfe)


#=====================#
# Model Output/ LaTeX #
#=====================#

library(texreg)
library(xtable)

texreg(list(m1_nfe, m2_nfe, m3_nfe, m4_nfe, m5_nfe, m6_nfe),
       caption="Issue Voting in the 2018 Florida Elections",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("m1_nfe","m2_nfe", "m3_nfe", "m4_nfe", "m5_nfe", "m6_nfe"),
       override.se=list(summary(m1_nfe)$coef[,2],
                        summary(m2_nfe)$coef[,2],
                        summary(m3_nfe)$coef[,2],
                        summary(m4_nfe)$coef[,2],
                        summary(m5_nfe)$coef[,2],
                        summary(m6_nfe)$coef[,2],
                        override.pval=list(summary(m1_nfe)$coef[,4],
                                           summary(m2_nfe)$coef[,4],
                                           summary(m3_nfe)$coef[,4],
                                           summary(m4_nfe)$coef[,4],
                                           summary(m5_nfe)$coef[,4],
                                           summary(m6_nfe)$coef[,4])))


texreg(list(m1t_nfe, m2t_nfe, m3t_nfe, m4t_nfe, m5t_nfe, m6t_nfe),
       caption="Issue Voting: General Election Turnout",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("m1t_nfe","m2t_nfe", "m3t_nfe", "m4t_nfe", "m5t_nfe", "m6t_nfe"),
       override.se=list(summary(m1t_nfe)$coef[,2],
                        summary(m2t_nfe)$coef[,2],
                        summary(m3t_nfe)$coef[,2],
                        summary(m4t_nfe)$coef[,2],
                        summary(m5t_nfe)$coef[,2],
                        summary(m6t_nfe)$coef[,2],
                        override.pval=list(summary(m1t_nfe)$coef[,4],
                                           summary(m2t_nfe)$coef[,4],
                                           summary(m3t_nfe)$coef[,4],
                                           summary(m4t_nfe)$coef[,4],
                                           summary(m5t_nfe)$coef[,4],
                                           summary(m6t_nfe)$coef[,4])))



#==============================================================================#
# Unweighted Descriptive Statistics and Chi-square with Clustered by Reminders #
#==============================================================================#

# Sex
crosstab(dat_final$sex, dat_final$internet,
         weight = NULL, prop.c = TRUE, chisq = TRUE)

tbl_s <- svytable(~sex+internet, d_strata)
summary(tbl_s, statistic="Chisq")


# Race
crosstab(dat_final$race1, dat_final$internet,
         weight = NULL, prop.c = TRUE, chisq = TRUE)

tbl_r <- svytable(~race1+internet, d_strata)
summary(tbl_r, statistic="Chisq")


# Party ID
crosstab(dat_final$pid, dat_final$internet,
         weight = NULL, prop.c = TRUE, chisq = TRUE)

tbl_pid <- svytable(~pid+internet, d_strata)
summary(tbl_pid, statistic="Chisq")


# Age
crosstab(dat_final$age1, dat_final$internet,
         weight = NULL, prop.c = TRUE, chisq = TRUE)

tbl_age <- svytable(~age1+internet, d_strata)
summary(tbl_age, statistic="Chisq")


# Voted 2016
crosstab(dat_final$vote16, dat_final$internet,
         weight = NULL, prop.c = TRUE, chisq = TRUE)

tbl_vote <- svytable(~vote16+internet, d_strata)
summary(tbl_vote, statistic="Chisq")


# Voted 2016
summary(dat_final$reg_inyears[dat_final$internet==1])
summary(dat_final$reg_inyears[dat_final$internet==0])

tbl_reg <- svytable(~reg_inyears+internet, d_strata)
summary(tbl_reg, statistic="Chisq")




#==========================================================================#
# Weighted Descriptive Statistics and Chi-square with Clustered St. errors #
#==========================================================================#

# Remove NAs from the weight variable 
dat_final1 <- dat_final[complete.cases(dat_final$rakeweightagrped), ]


# Specify the survey design including sample weights
d_strata_weighted <-svydesign(id=~nr_attempts, weights=~rakeweightagrped, data=dat_final1)


# Sex
crosstab(dat_final1$sex, dat_final1$internet,
         weight = dat_final1$rakeweightagrped, prop.c = TRUE)

tbl_sw <- svytable(~sex+internet, d_strata_weighted)
summary(tbl_sw, statistic="Chisq")


# Race
crosstab(dat_final1$race1, dat_final1$internet,
         weight = dat_final1$rakeweightagrped, prop.c = TRUE)

tbl_rw <- svytable(~race1+internet, d_strata_weighted)
summary(tbl_rw, statistic="Chisq")


# Party ID
crosstab(dat_final1$pid, dat_final1$internet,
         weight = dat_final1$rakeweightagrped, prop.c = TRUE)

tbl_pidw <- svytable(~pid+internet, d_strata_weighted)
summary(tbl_pidw, statistic="Chisq")


# Age
crosstab(dat_final1$age1, dat_final1$internet,
         weight = dat_final1$rakeweightagrped, prop.c = TRUE)

tbl_agew <- svytable(~age1+internet, d_strata_weighted)
summary(tbl_agew, statistic="Chisq")


# Voted 2016
crosstab(dat_final1$vote16, dat_final1$internet,
         weight = dat_final1$rakeweightagrped, prop.c = TRUE)

tbl_votew <- svytable(~vote16+internet, d_strata_weighted)
summary(tbl_votew, statistic="Chisq")


# Voted 2016
summary(dat_final1$reg_inyears[dat_final1$internet==1])
summary(dat_final1$reg_inyears[dat_final1$internet==0])

tbl_regw <- svytable(~reg_inyears+internet, d_strata_weighted)
summary(tbl_regw, statistic="Chisq")



#=================================#
# Heat map for the Sampling Frame #
#=================================#

library(mapproj)
library(ggmap)
library(ggplot2)
library(plotly)
library(data.table)
library(tidyr)
library(dplyr)
library(plyr)

dat3 <- read.dta13("Population from which sample was drawn.dta")

# Counties in USA
counties <- map_data("county")

# Keep only the counties of Florida
fl_county <- subset(counties, region == "florida")
fl_df <- subset(counties, region == "florida")

# Rename county name
names(fl_county)[6] <- "county_name"

fl_base <- ggplot(data = fl_df, mapping = aes(x = long, y = lat, group = group)) +
  coord_fixed(1.3) +
  geom_polygon(color = "black", fill = "gray")

fl_base + theme_nothing() +
  geom_polygon(data = fl_county, fill = NA, color = "black") +
  geom_polygon(color = "black", fill = NA)  # get the state border back on top

# Number of respondents per county 
dat3 <- setDT(dat3)
dat3[, nr_respondents := .N, .(county)]

# County name
countyname <- read.csv("fl_county_name.csv")

# Merge the dataset and county name
dat3_countyname <- merge(dat3, countyname, by = "county")

flcopa <- merge(fl_county, dat3_countyname, by = "county_name")
flcopa <- flcopa[!duplicated(flcopa$order), ]


ditch_the_axes <- theme(
  axis.text = element_blank(),
  axis.line = element_blank(),
  axis.ticks = element_blank(),
  panel.border = element_blank(),
  panel.grid = element_blank(),
  axis.title = element_blank()
)
library(reshape2)

map_samplingframe1 <- ggplot(flcopa, aes(long, lat, group = group)) + geom_polygon(aes(fill = nr_respondents)) + 
  geom_polygon(colour = "black", fill = NA, size = 0.2) + theme_bw() +
  ditch_the_axes + labs(fill = "Sampling Frame Registrants by County") +
  theme(legend.position = "bottom", legend.justification = "center") +
  scale_fill_continuous(
    high = "black",
    low = "white",
    labels = comma,
    guide = guide_colorbar(
      direction = "horizontal",
      barheight = unit(2, units = "mm"),
      barwidth = unit(50, units = "mm"),
      draw.ulim = F,
      title.position = 'top',
      # some shifting around
      title.hjust = 0.5,
      label.hjust = 0.5))
map_samplingframe1



#==========================#
# Heat map for Respondents #
#==========================#
load("dat2.Rdata")

# Counties in USA
counties <- map_data("county")

# Keep only the counties of Florida
fl_county <- subset(counties, region == "florida")
fl_df <- subset(counties, region == "florida")

# Rename county name
names(fl_county)[6] <- "county_name"

fl_base <- ggplot(data = fl_df, mapping = aes(x = long, y = lat, group = group)) +
  coord_fixed(1.3) +
  geom_polygon(color = "black", fill = "gray")

fl_base + theme_nothing() +
  geom_polygon(data = fl_county, fill = NA, color = "black") +
  geom_polygon(color = "black", fill = NA)  # get the state border back on top

# County name
countyname <- read.csv("fl_county_name.csv")
dat2 <- data.table(dat2)

# Number of respondents per county 
dat2[, nr_respondents := .N, .(county)]

dat2 <- dat2[,c("county","nr_respondents")]

# Merge the dataset and county name
dat2_new <- merge(dat2, countyname, by = "county")

flcopa <- inner_join(fl_county, dat2_new, by = "county_name")
flcopa <- flcopa[!duplicated(flcopa$order), ]


ditch_the_axes <- theme(
  axis.text = element_blank(),
  axis.line = element_blank(),
  axis.ticks = element_blank(),
  panel.border = element_blank(),
  panel.grid = element_blank(),
  axis.title = element_blank()
)

library(reshape2)

map_respondnets <- ggplot(flcopa, aes(long, lat, group = group)) + geom_polygon(aes(fill = nr_respondents)) +
  geom_polygon(colour = "black", fill = NA, size = 0.2) + theme_bw() +
  ditch_the_axes + labs(fill = "Respondents by County") +
  theme(legend.position = "bottom", legend.justification = "center") +
  scale_fill_continuous(
    high = "black",
    low = "white",
    #labels = comma,
    guide = guide_colorbar(
      direction = "horizontal",
      barheight = unit(2, units = "mm"),
      barwidth = unit(50, units = "mm"),
      draw.ulim = F,
      title.position = 'top',
      # some shifting around
      title.hjust = 0.5,
      label.hjust = 0.5))
map_respondnets



#===================================#
# Heat map for Internet Respondents #
#===================================#
load("dat2.Rdata")

# Counties in USA
counties <- map_data("county")

# Keep only the counties of Florida
fl_county <- subset(counties, region == "florida")
fl_df <- subset(counties, region == "florida")

# Rename county name
names(fl_county)[6] <- "county_name"

fl_base <- ggplot(data = fl_df, mapping = aes(x = long, y = lat, group = group)) +
  coord_fixed(1.3) +
  geom_polygon(color = "black", fill = "gray")


# County name
countyname <- read.csv("fl_county_name.csv")
dat2 <- data.table(dat2)

# Keep only web respondents
dat2 <- dat2[internet == 1]

# Number of respondents per county 
dat2[, nr_respondents := .N, .(county)]

dat2 <- dat2[,c("county","nr_respondents", "internet")]

# Merge the dataset and county name
dat2_new <- merge(dat2, countyname, by = "county")

##****## WARNING: USE INNER_JOIN NOT MERGE!!!!!!! OTHERWISE POLYGONS WON'T MATCH
flcopa <- inner_join(fl_county, dat2_new, by = "county_name")
flcopa <- flcopa[!duplicated(flcopa$order), ]


ditch_the_axes <- theme(
  axis.text = element_blank(),
  axis.line = element_blank(),
  axis.ticks = element_blank(),
  panel.border = element_blank(),
  panel.grid = element_blank(),
  axis.title = element_blank()
)

library(reshape2)

map_internet <- ggplot(data = fl_county, aes(long, lat, group = group)) + 
  geom_polygon(data = flcopa , aes(fill = nr_respondents)) +
  geom_polygon(colour = "black", fill = NA, size = 0.2) + theme_bw() +
  ditch_the_axes + labs(fill = "Internet Respondents by County") +
  theme(legend.position = "bottom", legend.justification = "center") +
  scale_fill_continuous(
    high = "black",
    low = "white",
    guide = guide_colorbar(
      direction = "horizontal",
      barheight = unit(2, units = "mm"),
      barwidth = unit(50, units = "mm"),
      draw.ulim = F,
      title.position = 'top',
      # some shifting around
      title.hjust = 0.5,
      label.hjust = 0.5))
map_internet



#================================#
# Heat map for Phone Respondents #
#================================#

load("dat2.Rdata")


# Counties in USA
counties <- map_data("county")

# Keep only the counties of Florida
fl_county <- subset(counties, region == "florida")
fl_df <- subset(counties, region == "florida")

# Rename county name
names(fl_county)[6] <- "county_name"

fl_base <- ggplot(data = fl_df, mapping = aes(x = long, y = lat, group = group)) +
  coord_fixed(1.3) +
  geom_polygon(color = "black", fill = "gray")


# County name
countyname <- read.csv("fl_county_name.csv")
dat2 <- data.table(dat2)

# Keep only phone respondnets
dat2 <- dat2[internet == 0]

# Number of respondents per county 
dat2[, nr_respondents := .N, .(county)]

dat2 <- dat2[,c("county","nr_respondents", "internet")]

# Merge the dataset and county name
dat2_new <- merge(dat2, countyname, by = "county")

flcopa <- inner_join(fl_county, dat2_new, by = "county_name")
flcopa <- flcopa[!duplicated(flcopa$order), ]


ditch_the_axes <- theme(
  axis.text = element_blank(),
  axis.line = element_blank(),
  axis.ticks = element_blank(),
  panel.border = element_blank(),
  panel.grid = element_blank(),
  axis.title = element_blank()
)

library(reshape2)

map_phone <- ggplot(data = fl_county, aes(long, lat, group = group)) + 
  geom_polygon(data = flcopa , aes(fill = nr_respondents)) +
  geom_polygon(colour = "black", fill = NA, size = 0.2) + theme_bw() +
  ditch_the_axes + labs(fill = "Phone Respondents by County") +
  theme(legend.position = "bottom", legend.justification = "center") +
  scale_fill_continuous(
    high = "black",
    low = "white",
    #labels = comma,
    guide = guide_colorbar(
      direction = "horizontal",
      barheight = unit(2, units = "mm"),
      barwidth = unit(50, units = "mm"),
      draw.ulim = F,
      title.position = 'top',
      # some shifting around
      title.hjust = 0.5,
      label.hjust = 0.5)) 
map_phone 



#============================================#
# Heat map for Voter File Active Registrants #
#============================================#
voterfile1 <- read.csv("Voter File for GIS Maps.csv")

# Counties in USA
counties <- map_data("county")

# Keep only the counties of Florida
fl_county <- subset(counties, region == "florida")
fl_df <- subset(counties, region == "florida")

# Rename county name
names(fl_county)[6] <- "county_name"

fl_base <- ggplot(data = fl_df, mapping = aes(x = long, y = lat, group = group)) +
  coord_fixed(1.3) +
  geom_polygon(color = "black", fill = "gray")

# fl_base + theme_nothing() +
#   geom_polygon(data = fl_county, fill = NA, color = "black") +
#   geom_polygon(color = "black", fill = NA)  # get the state border back on top

# County name
countyname <- read.csv("fl_county_name.csv")
dat2 <- data.table(voterfile1)

# Number of respondents per county 
dat2[, nr_respondents := .N, .(county)]

dat2 <- dat2[,c("county","nr_respondents")]

# Merge the dataset and county name
dat2_new <- merge(dat2, countyname, by = "county")


flcopa <- inner_join(fl_county, dat2_new, by = "county_name")
flcopa <- flcopa[!duplicated(flcopa$order), ]


ditch_the_axes <- theme(
  axis.text = element_blank(),
  axis.line = element_blank(),
  axis.ticks = element_blank(),
  panel.border = element_blank(),
  panel.grid = element_blank(),
  axis.title = element_blank()
)

library(reshape2)

map_voterfile <- ggplot(data = fl_county, aes(long, lat, group = group)) + 
  geom_polygon(data = flcopa , aes(fill = nr_respondents)) +
  geom_polygon(colour = "black", fill = NA, size = 0.2) + theme_bw() +
  ditch_the_axes + labs(fill = "Florida Active Registered Voters by County") +
  theme(legend.position = "bottom", legend.justification = "center") +
  scale_fill_continuous(
    high = "black",
    low = "white",
    #labels = comma,
    guide = guide_colorbar(
      direction = "horizontal",
      barheight = unit(2, units = "mm"),
      barwidth = unit(50, units = "mm"),
      draw.ulim = F,
      title.position = 'top',
      # some shifting around
      title.hjust = 0.5,
      label.hjust = 0.5))
map_voterfile


